perm filename SSORT.SAI[4,KMC] blob sn#170160 filedate 1975-09-17 generic text, type T, neo UTF8
BEGIN "SSORT"
COMMENT //THIS VERSION OF THE SOURCE PROGRAM WAS PREPARED FROM
  THE OLD VERSION BY DOING SRCCOM WITH THE NEW BUGGY VERSION.  THE "S" COMMAND FOR
  SUBSTITUTE WAS IMPLEMENTED, BUT THE "X" AND "U" FOR CROSSWORD AND
  UNSORT WERE NOT INCLUDED BECAUSE OF BUGS IN THE CODE;
COMMENT //MORE EDITING 74.D.07 TO COUNT PAGE FAULTS AND WARN USER//;
DEFINE MERGESORT="FALSE";COMMENT Future "other" way to do the sorting;
DEFINE FF="'14",VT="'13",CR="'15",LF="'12",CRLF="CR&LF";

REQUIRE "REMLIB[S,REM]" LOAD_MODULE;
EXTERNAL INTEGER PROCEDURE RANDOM(INTEGER LIM);

DEFINE SECZ="30";
INTEGER ARRAY SECAS[0:SECZ-1],SECS[0:SECZ*'200-1];
INTEGER CELL;
INTEGER ARRAY TABLE[0:'177];
  COMMENT Numbers in TABLE have the following meaning for the ascii character:
  -1 = delete this character whenever it occurs
   0 = normal character of text
   1 = delimiter character;
INTEGER I,OBUF,WORD,ON,BYT,IN,TOTSTR,DELIMW,LONGNW;
INTEGER ICHAN,IBRK,OCHAN,OBRK,DCHAN,DBRK;
BOOLEAN IEOF,OEOF,IFLG,OFLG,DFLG,DEOF;
LABEL W,B,DELIM,JOIN,FIN,RETRY;
STRING OUTFIL,INFIL,TMPFIL;
BOOLEAN KEEPMULT,FRONT,QUICKSW,JMCSW,CACHESTATS;
INTEGER TOTALTRY,TOTALMISS,TOTALWIN,LASTPAGE,MAXPAGE;COMMENT PAGING STATISTICS;
INTEGER NTHRASH;


COMMENT The following procedure was taken from someone, probably REG;
PROCEDURE SORT(REFERENCE INTEGER ARRAY KEY,DATA;INTEGER LOW,HIGH);
	BEGIN
	DEFINE LESS(A,B) = "(KEY[BASE+A]>KEY[BASE+B])";
	INTEGER BASE,N,J,K,FATHER,SON,SON1,SON2;
	BASE←LOW-1;
	N←HIGH-BASE;
	FOR J ← 2 STEP 1 UNTIL N DO BEGIN
		K ← J;
		FATHER ← K LSH -1;
		WHILE ((FATHER>0)∧LESS(K,FATHER)) DO BEGIN
			DATA[BASE+K] ↔ DATA[BASE+FATHER];
			KEY[BASE+K] ↔KEY[BASE+FATHER];
			K ← FATHER;
			FATHER ← K LSH -1;
			END;
		END;
	FOR J ← N STEP -1 UNTIL 2 DO BEGIN
		DATA[BASE+1] ↔ DATA[BASE+J];
		KEY[BASE+1] ↔ KEY[BASE+J];
		FATHER ← 1;
		SON1← FATHER LSH 1;
		SON← SON2← SON1+1;
		WHILE SON1 < J DO BEGIN
			IF ((SON2 = J) ∨ LESS(SON1,SON2)) THEN SON ← SON1;
    			IF LESS(SON,FATHER) THEN BEGIN;
				DATA[BASE+FATHER] ↔ DATA [BASE+SON];
				KEY[BASE+FATHER] ↔ KEY [BASE+SON];
				FATHER ← SON;
				SON1 ← FATHER LSH 1; SON ← SON2 ← SON1+1;
				END ELSE DONE;
			END;
		END;
	END;

PROCEDURE REPORTSTATS(STRING TITLE);
 OUTSTR("
["&TITLE&", "&CVS(TOTALTRY)&" ACCESSES AS FOLLOWS:  "&
  CVS(TOTALMISS)&" PAGE FAULTS, "&CVS(MAXPAGE)&" PAGES,
  "&CVS(TOTALTRY-TOTALMISS-TOTALWIN)&" SUCCESSES ON SAME PAGE, "&
  CVS(TOTALWIN)&" SUCCESSES ON OTHER CACHE PAGES,
  -- "&CVF((TOTALMISS+TOTALWIN)/(TOTALMISS))&" TIMES AS FAST AS SIMPLE BUFFERING]"&CRLF);

INTEGER PROCEDURE RW(INTEGER ADR);
BEGIN "READ WORD" COMMENT USES A SIMULATED VIRTUAL-MEMORY SYSTEM;
	INTEGER SADR,WADR,I;
	TOTALTRY←TOTALTRY+1;
	CELL←-1;
	SADR←ADR LSH -7;
	IF MAXPAGE≤SADR THEN MAXPAGE←SADR+1;
	WADR←ADR LAND '177;
	FOR I←0 STEP 1 UNTIL SECZ-1 DO IF SECAS[I]=SADR THEN BEGIN "FOUND"
		CELL←I;DONE;END "FOUND";
	IF CELL<0 THEN BEGIN "NOT FOUND"
		CELL←RANDOM(SECZ);
		USETI(DCHAN,SADR+1);
		ARRYIN(DCHAN,SECS[CELL*'200],'200);
		SECAS[CELL]←SADR;
		TOTALMISS←TOTALMISS+1;
		IF TOTALMISS>NTHRASH THEN BEGIN "WARN THRASH"
			IF TOTALMISS>MAXPAGE THEN REPORTSTATS("THRASHING??");
			NTHRASH←(MAXPAGE+NTHRASH)*1.5;
			END "WARN THRASH";
		END "NOT FOUND"
	  ELSE IF LASTPAGE≠SADR THEN TOTALWIN←TOTALWIN+1;
	LASTPAGE←SADR;
	RETURN(SECS[CELL*'200+WADR]);
	END "READ WORD";

PROCEDURE TVKLUG(INTEGER ICHAN);
  COMMENT  See if this file has tv-edit directory or sos line numbers.
    If tv-dir then flush it, if sos line numbers then usererr,
    else useti back to first word of first record;
  BEGIN "TV KLUG"
	LABEL NOTVED;
	INTEGER W,XORW;
	IF (W←WORDIN(ICHAN))≠CVASC("COMME") THEN GO TO NOTVED;
	IF WORDIN(ICHAN)≠CVASC("NT ⊗ ") THEN GO TO NOTVED;
	IF WORDIN(ICHAN)≠CVASC("  VAL") THEN GO TO NOTVED;
	COMMENT There is a tv-edit directory, must go past next ⊗ character;
	XORW←LNOT CVASC("⊗⊗⊗⊗⊗");
	START_CODE "FLUSH"
		DEFINE SUBRET=1,MASK=2,XORWD=3,ADDWD=4;
		LABEL L;
		MOVE XORWD,XORW;
		MOVE ADDWD,['002010040201];
		MOVE MASK,['402010040200];
	L:	PUSH '17,ICHAN;PUSHJ '17,WORDIN;MOVE SUBRET;
		XOR XORWD;LSH -1;MOVEM SUBRET;
		ADD ADDWD;XOR ADDWD;XOR SUBRET;AND MASK;COMMENT See if the bit changed;
		JUMPE L;COMMENT If no jump because a bit was on, the ⊗ was found;
		END "FLUSH";
	OUTSTR("TV-EDIT DIRECTORY HAS BEEN FLUSHED"&CRLF);
	IF TABLE[FF]=0 THEN BEGIN "PUR FF"
		OUTSTR("(WILL PURGE ALL FORM-FEED CHARACTERS ALSO)");
		TABLE[FF]←-1;
		END "PUR FF";
	RETURN;
NOTVED:	IF (W LAND '1)≠0 THEN USERERR(0,1,"SOS LINE NUMBERS, PLEASE COPY /N");
	USETI(ICHAN,1);RETURN;
	END "TV KLUG";

COMMENT MAINLINE BEGINS HERE;

OUTSTR("REM STRING-SORT, USING STANDARD ASCII ALPHABETICAL ORDER
(THIS PROGRAM NOW WARNS YOU IF THE VIRTUAL MEMORY IS THRASHING, AS OF 74.DEC.08)
(ROUTINE PAGING STATISTICS ARE NOW AN OPTIONAL SWITCH, AS OF 74.DEC.22)"&CRLF);
RETRY:FOR I←0 STEP 1 UNTIL '177 DO TABLE[I]←0;
TABLE[CR]←TABLE[FF]←TABLE[VT]←-1;
TABLE[LF]←1;TOTSTR←0;DELIMW←(CR LSH 8) LOR (LF LSH 1);COMMENT <CRLF>;
TMPFIL←CVS(CALL(0,"PJOB"))&"S.TMP";
CACHESTATS←JMCSW←QUICKSW←FALSE;

OPEN(ICHAN←GETCHAN,"DSK",'10,2,0,0,IBRK,IEOF);
WHILE TRUE DO BEGIN "LOOKUP"
	BOOLEAN IFLG;
	INTEGER BR;
	STRING SAVCOM,COM,SW;
	OUTSTR("*");
	SETBREAK(1,"←/","","INS");
	SAVCOM←COM←INCHWL;
	IF EQU(COM,"") OR EQU(COM,"HELP") OR COM="?" THEN BEGIN "HELP ME"
		OUTSTR("
COMMAND FORMAT:
<OUTFILE>←<INFILE><SWITCHES>  OR  <FILE><SWITCHES>

SWITCHES:  <DEFAULT> := /O12/D15/D14/D13/S15&12
/A<CHARACTER> = USE THAT ASCII CHARACTER AS DELIMITER
/O<OCTAL> = CONVERT TO CHARACTER AND USE THAT AS DELIMITER
/B = BLANK LINE AS DELIMITER -- DON'T MIX /B WITH ANY OF /F /D OR /S
  (ANY OF THE ABOVE WILL PURGE ALL PRIOR /D AND /S SWITCHES)
/F = PUT DELIMITER AT FRONT OF EACH STRING (DON'T DO THIS IF <LF> IS DELIM)
/R = RETAIN MULTIPLE COPIES OF IDENTICAL STRINGS
/D<OCTAL> = DELETE ALL OCCURANCES OF THAT CHARACTER
/S<OCTAL>[&<OCTAL>] = REPLACE DELIMITER BY THAT STRING UPON OUTPUT
/P = PAGING STATISTICS TYPED OUT AFTER BOTH MAJOR PASSES
");
		GO TO RETRY;
		END "HELP ME";
	OUTFIL←SCAN(COM,1,BR);
	IF BR="←" THEN INFIL←SCAN(COM,1,BR) ELSE INFIL←OUTFIL;
	WHILE BR="/" DO BEGIN "DECODE SWITCHES"
		LABEL SWEND,SWBAD;
		INTEGER KHANS;
		SW←SCAN(COM,1,BR);
		IF SW="P" THEN BEGIN "CACHE STAT"
			CACHESTATS←TRUE;GO TO SWEND;
			END "CACHE STAT";
		IF SW="B" THEN BEGIN "JMC MODE"
			INTEGER I;
			FOR I←1 STEP 1 UNTIL '177 DO TABLE[I]←0;
			TABLE[LF]←1;
			JMCSW←TRUE;
			DELIMW←LF LSH 1;
			GO TO SWEND;
			END "JMC MODE";
		IF SW="A" OR SW="O" THEN BEGIN "SET DELIMITER"
			INTEGER BRK,I;
			IF SW="A" THEN BRK←SW[2 FOR 1]
			  ELSE BRK←CVO(SW[2 TO ∞]);
			FOR I←1 STEP 1 UNTIL '177 DO TABLE[I]←0;
			TABLE[BRK LAND '177]←1;
			DELIMW←BRK LSH 1;
			GO TO SWEND;
			END "SET DELIMITER";
		IF SW="F" THEN BEGIN "FRONT DELIMITER"
			FRONT←TRUE;GO TO SWEND;
			END "FRONT DELIMITER";
		IF SW="S" THEN BEGIN "SUBST"
			STRING SUB,SCR;
			INTEGER I,KH,BR;
			DELIMW←0;
			SETBREAK(4,"&","","INS");
			SCR←SW[2 TO ∞];
			FOR I←1 STEP 1 UNTIL 5 DO BEGIN "ONE CHAR"
				SUB←SCAN(SCR,4,BR);
				KH←CVO(SUB);
				DELIMW←(DELIMW LSH 7) LOR (KH LSH 1);
				IF BR≠"&" THEN GO TO SWEND;
				END "ONE CHAR";
			OUTSTR("
(TOO MANY CHARACTERS SPECIFIED BY /S, MAXIMUM=5)");
			GO TO SWBAD;
			END "SUBST";
		IF SW="R" THEN BEGIN KEEPMULT←TRUE;GO TO SWEND;END;
		IF SW="D" THEN BEGIN "DELETE"
			INTEGER BRK;
			BRK←CVO(SW[2 TO ∞]);
			IF TABLE[BRK]≠1 THEN TABLE[BRK]←-1
			  ELSE GO TO SWBAD;
			GO TO SWEND;
			END "DELETE";
	SWBAD:	OUTSTR("
π INVALID SWITCH """&SW&""" WANT TO RETYPE IT?");
		WHILE TRUE DO IF (KHANS←(INCHRW LAND '137))="Y" OR KHANS="Z" THEN BEGIN "RETYPE"
			OUTSTR("(OK, TRY THAT SWITCH AGAIN)"&CRLF&"/");
			IF KHANS="Z" THEN PTOSTR(0,SW);
			COM←INCHWL&(IF LENGTH(COM)>0 THEN "/" ELSE "")&COM;
			BR←"/";
			GO TO SWEND;
			END "RETYPE"
		  ELSE IF KHANS="N" THEN DONE
		  ELSE OUTSTR("(Y=YES RETYPE  N=NO IGNORE  Z=REEDIT)");
		OUTSTR("(SWITCH IGNORED)"&CRLF);
	SWEND:	END "DECODE SWITCHES";
	LOOKUP(ICHAN,OUTFIL,IFLG);
	IF NOT IFLG THEN BEGIN "ALREADY EXISTS"
		INTEGER KH;
		CLOSE(ICHAN);
		OUTSTR("OUTPUT FILE ALREADY EXISTS, OK TO REPLACE?");
		WHILE TRUE DO IF (KH←INCHRW LAND '137)="N" OR KH="Z" THEN BEGIN
			OUTSTR(CRLF&"BACK TO TYPE-IN OF COMMAND:"&CRLF);
			IF KH="Z" THEN LODED(SAVCOM);
			GO TO RETRY;END
		  ELSE IF KH="A" THEN DONE
		  ELSE IF KH="Y" THEN BEGIN QUICKSW←TRUE;DONE;END
		  ELSE OUTSTR(CRLF&"(Y=YES / N=NO / A=ASK ME LATER / Z=NO BUT USE Z-MODE RE-EDIT COMMAND) ?");
		OUTSTR(CRLF);
		END "ALREADY EXISTS";
	LOOKUP(ICHAN,INFIL,IFLG);
	IF NOT IFLG THEN DONE;
	OUTSTR("(NO SUCH FILE) "&CRLF);
	END "LOOKUP";
TABLE[0]←-1;COMMENT Nulls absolutely must be ignored;
OPEN(OCHAN←GETCHAN,"DSK",'10,0,2,0,OBRK,OEOF);
ENTER(OCHAN,TMPFIL,OFLG);
IF OFLG THEN USERERR(0,0,"CANNOT ENTER OUTPUT FILE");

BEGIN "PRE PROCESS THE FILE"
	INTEGER THISNW;
	BOOLEAN ANYTEXT;COMMENT Useful for JMC mode to test for blank lines;
	ANYTEXT←FALSE;
	THISNW←LONGNW←0;
	TVKLUG(ICHAN);COMMENT  Get rid of tv-edit directory if it is here;
	OUTSTR("PARSING STRINGS...");
COMMENT *** SAIL STRING SPACE IS UNRELIABLE WHENEVER A GARBAGE-COLLECTION
  OCCURS, THAT IS WHY I UNPACK MY OWN TEXT INSTEAD OF USING THE
  INPUT(CHAN,BRKTAB,BRCHAR) ROUTINES ***;
	OBUF←ON←0;
	W:WORD←WORDIN(ICHAN);
	IF IEOF THEN GO TO DELIM;
	IF (WORD LAND 1)≠0 THEN BEGIN "SOS"
		USERERR(0,1,"
ERROR, FILE HAS LINE NUMBERS, WILL RESULT IN TABS IN THE LINE");
		GO TO W;
		END "SOS";
	IN←5;
	B:START_CODE MOVEI 10,0;MOVE 11,WORD;
		LSHC 10,7;
		MOVEM 10,BYT;MOVEM 11,WORD;
		END;
	IF TABLE[BYT]>0 THEN BEGIN "DELIM"
		IF NOT JMCSW THEN GO TO DELIM;
		  COMMENT In normal mode, the delim has happened;
		IF NOT ANYTEXT THEN GO TO DELIM;COMMENT In JMC mode,
		  there hasn't been any text since the previous delimiter;
		ANYTEXT←FALSE;
		END "DELIM"
	  ELSE IF TABLE[BYT]<0 THEN GO TO JOIN;
	IF BYT≠CR AND BYT≠LF THEN ANYTEXT←TRUE;COMMENT CR doesn't count as text;
	IF ON≥5 THEN BEGIN "OUTPUT"
		WORDOUT(OCHAN,OBUF LAND (-1 LSH -1));
		THISNW←THISNW+1;
		ON←0;END "OUTPUT";
	OBUF←(OBUF LSH 7) LOR BYT;
	ON←ON+1;
	GO TO JOIN;
	DELIM:IF ON>0 THEN BEGIN "PURGE"
		WORDOUT(OCHAN,(OBUF LSH (7*(5-ON))) LAND (-1 LSH -1));
		THISNW←THISNW+1;
		IF THISNW>LONGNW THEN LONGNW←THISNW;
		ON←0;
		WORDOUT(OCHAN,0);
		TOTSTR←TOTSTR+1;
		END "PURGE";
	THISNW←0;
	IF IEOF THEN GO TO FIN;
	JOIN:IF (IN←IN-1)>0 THEN GO TO B ELSE GO TO W;
	FIN:CLOSE(OCHAN);
	OUTSTR("DONE, THERE WERE "&CVS(TOTSTR)&" STRINGS, LARGEST WAS "&CVS(LONGNW)&" WORDS"&CRLF);
	CLOSE(ICHAN);
	END "PRE PROCESS THE FILE";

IF NOT MERGESORT THEN BEGIN "SORT POINTERS"
	INTEGER ARRAY KEY,PAIR[1:TOTSTR];
	INTEGER ADR,INDEX,LOW,HIGH,MASKD,MASKA;
	LABEL TESTL,TESTH,TESTD,FIN;
	MASKD←'777700000000;
	MASKA←'000077777777;
	ADR←0;
	LOOKUP(ICHAN,TMPFIL,IFLG);
	IF IFLG THEN USERERR(0,1,"NO TMP FILE");
	OUTSTR("FIRST-WORD ARRAY, INIT...");
	FOR INDEX←1 STEP 1 UNTIL TOTSTR DO BEGIN "INIT ARRAY"
		KEY[INDEX]←WORDIN(ICHAN);
		PAIR[INDEX]←ADR;
		WHILE TRUE DO BEGIN "FIND ZERO"
			ADR←ADR+1;
			IF WORDIN(ICHAN)=0 THEN DONE;
			IF IEOF THEN USERERR(0,1,"EARLY EOF ICHAN");
			END "FIND ZERO";
		ADR←ADR+1;
		END "INIT ARRAY";
	RELEASE(ICHAN);
	OUTSTR("DONE / SORTING...");
	SORT(KEY,PAIR,1,TOTSTR);
	OUTSTR("DONE"&CRLF);
	FOR INDEX←0 STEP 1 UNTIL SECZ-1 DO SECAS[INDEX]←-1;
	TOTALTRY←TOTALMISS←TOTALWIN←0;LASTPAGE←MAXPAGE←-1;NTHRASH←1000;
	OPEN(DCHAN←GETCHAN,"DSK",'17,0,0,0,DBRK,DEOF);
	LOOKUP(DCHAN,TMPFIL,DFLG);
	IF DFLG THEN USERERR(0,1,"CANNOT OPEN TMPFIL IN DUMP MODE");
	LOW←1;
	OUTSTR("MORE SORTING...");
TESTL:	IF LOW≥TOTSTR THEN GO TO FIN;
	HIGH←LOW;
TESTH:	IF HIGH≥TOTSTR THEN GO TO TESTD;
	IF KEY[HIGH]≠KEY[HIGH+1] THEN GO TO TESTD;
	IF (MASKD LAND PAIR[HIGH])≠(MASKD LAND PAIR[HIGH+1]) THEN GO TO TESTD;
	IF KEY[HIGH]=0 THEN IF KEEPMULT THEN GO TO TESTD ELSE BEGIN "PURGE"
		IF HIGH≠LOW THEN USERERR(0,1,"OOPS, LOW≠HIGH");
		PAIR[LOW]←-1;LOW←LOW+1;END "PURGE";
	HIGH←HIGH+1;
	GO TO TESTH;
TESTD:	IF HIGH≤LOW THEN BEGIN LOW←LOW+1;GO TO TESTL;END;
	IF FALSE THEN OUTSTR("SUBARRAY["&CVS(LOW)&","&CVS(HIGH)&"] REPLACING...");
	FOR INDEX←LOW STEP 1 UNTIL HIGH DO BEGIN "NEXT WORD"
		INTEGER SADR,WADR,P;
		PAIR[INDEX]←P←PAIR[INDEX]+'100000001;
		P←P LAND MASKA;
		KEY[INDEX]←RW(P);
		END "NEXT WORD";
	IF FALSE THEN OUTSTR("DONE // SORTING...");
	SORT(KEY,PAIR,LOW,HIGH);
	IF FALSE THEN OUTSTR("DONE"&CRLF);
	GO TO TESTL;
FIN:	OUTSTR("DONE"&CRLF);
	IF CACHESTATS THEN REPORTSTATS("SO FAR");
	ENTER(OCHAN,TMPFIL,OFLG);
	OUTSTR("COMPILING OUTPUT FILE...");
	FOR INDEX←1 STEP 1 UNTIL TOTSTR DO BEGIN "COPY OUT"
		LABEL NEX;
		INTEGER ARRAY BUF[0:'177];
		INTEGER ADR,SADR,WADR,P;
		P←PAIR[INDEX];
		IF P<0 THEN GO TO NEX;
		ADR←(P LAND '77777777) - (P LSH -24);
		SADR←ADR LSH -7;
		WADR←ADR LAND '177;
		RW(ADR);
		IF FRONT THEN WORDOUT(OCHAN,DELIMW);
		WHILE TRUE DO BEGIN "SCAN STR"
			INTEGER W;
			IF SECAS[CELL]≠SADR THEN USERERR(0,1,"CELL FOULED UP");
			W←SECS[CELL*'200+WADR];
			IF W=0 THEN BEGIN "END OF STRING"
				IF NOT FRONT THEN WORDOUT(OCHAN,DELIMW);
				DONE;END "END OF STRING"
			  ELSE WORDOUT(OCHAN,W LSH 1);
			WADR←WADR+1;ADR←ADR+1;
			IF WADR>'177 THEN BEGIN "NEX SEC"
				WADR←0;SADR←SADR+1;
				RW(ADR);
				END "NEX SEC";
			END "SCAN STR";
	NEX:	END "COPY OUT";
	RELEASE(DCHAN);
	CLOSE(OCHAN);RELEASE(OCHAN);
	OUTSTR("DONE"&CRLF);
	IF CACHESTATS THEN REPORTSTATS("GRAND TOTAL");
	OUTSTR(CRLF);
	PTOSTR(0,"TRA "&OUTFIL&"←"&TMPFIL&"/N"&
	  (IF QUICKSW THEN "/Q" ELSE "")&CRLF);
	OUTSTR("<DO NOT TYPE AHEAD, WAIT FOR EXIT>");
	CALL(0,"EXIT");
	END "SORT POINTERS";
END "SSORT";